ISA DeepData

This notebook processes data from the International Seabed Authority (ISA) DeepData database into Darwin Core archives. The resulting datasets are hosted at https://datasets.obis.org/hosted/isa/index.html.

The code for this notebook is hosted at https://github.com/iobis/notebook-deepdata.

Data flow

Fetching the data from the ISA server

The DeepData dataset is delivered to OBIS via FTP. Let’s list the files on the FTP server and download the most recent JSON file to a temporary directory. Credentials are stored in env.txt.

require(RCurl)
library(stringr)
library(dplyr)

readRenviron("env.txt")
url <- Sys.getenv("deepdata_url")
res <- getURL(url, verbose = TRUE, ftp.use.epsv = FALSE, dirlistonly = TRUE)

filenames <- unlist(strsplit(res, "\\n"))
filenames <- sort(filenames[str_detect(filenames, ".json")], decreasing = TRUE)

if (length(filenames) != 1) {
  stop("Unexpected number of files found")
}

file_url <- paste0(url, "/", filenames[1])
temp_file <- tempfile(pattern = "deepdata_", tmpdir = tempdir(), fileext = ".json")
download.file(file_url, temp_file)

Parsing the JSON file

Earlier versions of the file were encoded in the non-standard ISO-8859-1, requiring the need to use readLines before parsing the data with the jsonlite package, but that seems to be fixed now.

library(jsonlite)
library(purrr)

con <- file(temp_file)
lines <- readLines(file(temp_file, encoding = "UTF-8"), warn = FALSE)
close(con)
records <- fromJSON(lines, simplifyDataFrame = TRUE)$DEEPDATA %>%
  as_tibble()

Generating Darwin Core data files

We can now extract a list of distinct datasets from the data frame, and generate a Darwin Core archive for each dataset. I’m using group_indices() to assign a unique dataset identifier to each record.

dataset_ids <- records %>%
  group_indices(Metadata)
records$dataset_id <- dataset_ids  

Let’s also take a look at some titles and abstracts.

titles <- records %>%
  distinct(Metadata$title) %>%
  pull("Metadata$title")

titles
## [1] "Biological Sampling"
abstracts <- records %>%
  distinct(Metadata$abstract) %>%
  pull("Metadata$abstract")

abstracts
##  [1] "Sampling data captured in Oceanic Exploration Research In 2017, Ifremer started work on the geostatistical study for more precise estimation of the mineral resources in the contract area, in line with ISBA recommendations (ISBA/21/LTC/15). The study is being carried out in collaboration with RSC Mining & Mineral Exploration Ltd. (RSC). The study is still ongoing and is slated for completion in early 2018"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           
##  [2] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities in 2017 for implementing the program of activities for the current five-year program of activities specified in Schedule 2 of Annex III to the contract for exploration of cobalt-rich ferromanganese crusts between the International Seabed Authority (hereinafter referred to as ISA) and the China Ocean Mineral Resources Research and Development Association. In accordance with the provisions specified in Section 10 of the Annex II to the contract, COMRA, as the contractor, submits to the Secretary-General of ISA this annual report covering its program of activities carried out in the year of 2017."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
##  [3] "Sampling data captured in Oceanic Exploration Research One objective of the exploration campaign in 2016 was to increase the box corer sampling density in a nodule field of high economic potential (prospective area #1 [PA1] in the eastern German license area) called sub-cluster CL11 (231 km2) in order to (1) determine its reservoir content in more detail, and (2) investigate whether and to what extent resource assessment can be improved (and the efficiency of exploration planning increased) by determining the optimal number of box core samples required per target area. For the analysis of nodule abundances obtained from box core samples, the geostatistical methods variography and ordinary block kriging were used."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
##  [4] "Sampling data captured in Oceanic Exploration Research Based mainly on nodule abundances in 221 box core samples and the hydro-acoustic data obtained during seven cruises to the German license area, resource modelling using artificial neural network statistics was carried out for the entire license area of 75,000 km2 size, and in particular for four prospective potential mining areas in the eastern license area with a total size of 1038 km2. According to these assessments, the entire license area contains 620 million metric tonnes of nodules (dry weight) with 195 million t of Mn, 8.7 million t of Ni, 7.3 million t of Cu, 1.6 million t of Ti, and 1.1 million t of Co. Results of geostatistical analyses show that the mean nodule abundance in the four prospective areas in the eastern license area (“sub-clusters”) varies between 17.7 and 23.1 kg/m2 and the resources here amount to 14 million metric tonnes of nodules in total (dry weight), containing 4.4 million t of Mn, 198,000 t of Ni, 165,000 t of Cu, 36,000 t of Ti, and 24,000 t of Co."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
##  [5] "Sampling data captured in Oceanic Exploration Research Based on geochemical and physical data we identified differences in the polymetallic nodule facies within the eastern German license area. Nodules in Prospective Areas # 1 are generally larger and show significantly higher contents of Ni and Cu as compared to the nodules in Prospective Area #2 which are smaller and are characterised by higher contents of Co. Furthermore, our study shows that sediment, which mainly consists of Si and Al oxides, contributes 13 % to the dry weight of the nodules on average. The method we used is quantitative and can thus be used for habitat mapping and be related to biodiversity and other environmental parameters"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
##  [6] "Sampling data captured in Oceanic Exploration Research This report is submitted in accordance with Section 10 of Standard Clauses of the Contract contained in Appendix II to the Contract for exploration for cobalt-rich ferromanganese crusts, as concluded between the International Seabed Authority and the Ministry of Natural Resources and Environment of the Russian Federation on March, 10th, 2015. The report contains information about the results of activities concerning the study of the seabed cobalt-rich ferromanganese crusts (CRC) carried out during the first year of the first five-year period within the exploration area according to the Plan of the Exploration Activities, approved by the Council in July 2014 (Appendix 1 to the Contract for exploration of CRC). The main activities were carried out in the following directions: - Exploration activities; - Environment baseline studies."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
##  [7] "Sampling data captured in Oceanic Exploration Research Ministry of Oceans and Fisheries authority of the Government of the Republic of Korea hereby submition to the International Seabed Authority the annual report on the activities in 2019 conducted in accordance with the Programme of Activities of the Contract for Exploration."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
##  [8] "Sampling data captured in Oceanic Exploration Research In accordance with Section 10, Appendix II of the Contract for Exploration, Deep Ocean Resources Development Co., LTD (DORD) hereby submits to the International Seabed Authority (ISA) the Annual Report of 2016 (the 1st year of the 4th 5-year plan) on exploration of polymetallic nodule. This report contains the results of the analysis work done in 2016 on resource survey and environmental studies, the results obtained by the research and development on mining systems and metallurgical processing and some considerations for economic assessment, and also contains the data on the environment studies obtained during the survey cruise conducted in 2016. "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           
##  [9] "Sampling data captured in Oceanic Exploration Research In accordance with Section 10, Appendix II of the Contract for Exploration, Deep Ocean Resources Development Co., Ltd. (DORD) hereby submits to the International Seabed Authority (ISA) the Annual Report for 2018 (the 3 rd year of the 5-year plan (June 2016 – June 2021)) on the ex ploration of polymetallic nodules. This report contains our activities conducted in 2018 as follows: Survey cruise in DORD’s license area from 18 September to 8 October 2018 Geological analysis on the resource data collected from th e cruise surveyconducted in 2017; Environmental analysis on samples collected from the cruise survey conductedin 2017; Consideration on a collector system Research and development on mining systems and metallurgical processing Economic assessment Training Programme Other items included in this report are a certified financial statement of actual and direct exploration expenditure and a program of activities for the following year."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
## [10] "Sampling data captured in Oceanic Exploration Research The report is compiled in accordance with Section 10 of the standard clauses for exploration contract specified in Annex III to the Agreement between the International Seabed Authority and the Interoceanmetal Joint Organization concerning the Extension of the Contract for Exploration for Polymetallic Nodules between the International Seabed Authority and Interoceanmetal Joint Organization (hereafter referred to as the Agreement). The report includes information concerning the research and exploration work, executed by IOM in 2018, in accordance with the Program of Activities for the Extension Period (starting from 2016) as specified in Annex I to the Agreement."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [11] "Sampling data captured in Oceanic Exploration Research The underlying report has been prepared in accordance with Chapter 10 of Standard contractual conditions specified in the Appendix II to the Contract for exploration entered between the International Seabed Authority (ISA) and the Joint Interoceanmetal organisation (IOM) on 29 March 2001. The report has been prepared according to the guidelines of Legal and Technical Commission of the International Seabed Authority, acting on the basis of Rule 39 of the Rules for the exploration and prospecting for polymetallic nodules in the Area, “Recommendations for the guidance of contractors on the content, format and structure of annual reports” (ISBA/21/LTC/15)."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
## [12] "Sampling data captured in Oceanic Exploration Research Pursuant to Section 10, Appendix II of the Contract for Exploration between the International Seabed Authority and the Government of the Republic of Korea (ISBA/7/C/4), the Ministry of Land, Transport and Maritime Affairs (MLTM) that is the relevant authority of the Government of the Republic of Korea hereby submits to the International Seabed Authority the annual report on the exploration activities in 2011 conducted in accordance with Paragraph 5, Schedule 2 on the Programme of Activities of the Contract for Exploration.       The Contractor had performed the geological and geophysical survey with sampling of sediments and nodules, the marine environmental baseline survey in and around the contract area focused on the potential benthic impact experiment and reference sites, and research and development works on mining and processing technologies as a part of its exploration activities in 2011. "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [13] "Sampling data captured in Oceanic Exploration Research In 2017 (Year 4 of the exploration contract), a multidisciplinary offshore campaign named “GSRNOD17” was conducted by Global Sea Mineral Resources NV (GSR), subsidiary company of the DEME‐Group, in its offshore claim area (Clarion‐Clipperton Zone – CCZ), pursuant to the exploration contract concluded with the International Seabed Authority (ISA). The expedition took place on board of the MV Topaz Captain, from mid‐May to early July 2018 (42 days offshore), involving 52 persons on board (crew, engineers, scientists and experts) but also a significant onshore support. "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [14] "Sampling data captured in Oceanic Exploration Research China Ocean Mineral Resources Research and Development Association (COMRA) conducted its activities during 2017 according to the Second Five-Year Plan specified in Appendix II to the Polymetallic Sulphides Exploration Contract (hereinafter referred to as the Exploration Contract) between the International Seabed Authority (ISA) and COMRA. In accordance with the provisions specified in Section 10 of the Exploration Contract, COMRA, as the contractor, submits to the Secretary-General of ISA this Annual Report covering its program of activities carried out in the year of 2017."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
## [15] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities during 2018 for implementing the programme of activities set forth in the Working Programme in the Extension Five-year Period specified in the Application for Extension of the Contract for Exploration of Polymetallic Nodules (hereinafter referred to as the Extension Application). In accordance with the provisions specified in Section 10 of Annex IV to the Regulations, COMRA, as the contractor, submits to the Secretary-General of the ISA, this Annual Report covering its programme of activities for the year of 2018."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
## [16] "Sampling data captured in Oceanic Exploration Research This report is submitted in accordance with Section 10 of the Standard Clauses of the Contract, contained in Appendix II to the Contract for Exploration for polymetallic nodules, as concluded between the International Seabed Authority and SSC FSUGE Yuzhmorgeologiya on March 29, 2001. The report contains information about the results of activities carried out by SSC FSUGE Yuzhmorgeologiya in 2015 in the seabed polymetallic nodule Exploration Area according to the Plan of Exploration Activities, approved by the Council in August 1997 (Appendix 1 to the Contract). During the period under review in 2015, SSC FSUGE Yuzhmorgeologiya carried out the activities that were the constituent part of the Programme of Activities for the third five-year period (2011-2015), included into the Contract for Exploration."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
## [17] "Sampling data captured in Oceanic Exploration Research This report is submitted in accordance with Section 10 of the Standard Clauses of the Contract, contained in Appendix II to the Contract for exploration for polymetallic nodules, as concluded between the International Seabed Authority and Contractor Yuzhmorgeologiya on March 29, 2001. The report contains information about the results of activities carried out by Contractor Yuzhmorgeologiya in 2016 in the Exploration Area for polymetallic nodules. The Exploration Activities were carried out as provided by the Plan of Exploration Activities, approved by the Council in August 1997 (Appendix 1 to the Contract). The main activities under the Contract in 2016 consisted of the investigations of one Site (B7) within the Eastern Polygon of the Russian Exploration Area for polymetallic nodules (REA) of Contractor Yuzhmorgeologiya."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           
## [18] "Sampling data captured in Oceanic Exploration Research UK Seabed Resources Ltd (UKSR) is pleased to submit this annual report on activities with respect to the Plan of Work associated with both of its 15-year International Seabed Authority (ISA) Exploration Licences. The first licence entered into effect on 8 February 2013 (UK1); the second on 29 March 2016 (UK2). UKSR has integrated together the exploration programmes of UK1 and UK2 in order to provide UKSR and the ISA and its stakeholders a broader insight into CCZ-wide ecosystem structure and function. This report describes UKSR’s activities and particular achievements for the calendar year 2017 with specific reference to UK, and also its plans for the future."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
## [19] "Sampling data captured in Oceanic Exploration Research During GSRNOD14A in 2014, over 80 000 km² was surveyed with multi‐beam echo sounding (MBES). Besides that, 10 boxcore samples were collected from which sampling tubes for further geotechnical testing were achieved. After analysis of these data, 3 Areas of Interest (AoIs B) were selected for detailed investigation during GSRNOD15A in 2015."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
## [20] "Sampling data captured in Oceanic Exploration Research UK Seabed Resources Ltd (UKSR) is pleased to submit this annual report on activities with respect to the Plan of Work associated with both of its 15-year International Seabed Authority (ISA) Exploration Licences. The first licence entered into effect on 8 February 2013 (UK1); the second on 29 March 2016 (UK2). UKSR has integrated together the exploration programmes of UK1 and UK2 in order to provide UKSR and the ISA and its stakeholders a broader insight into CCZ-wide ecosystem structure and function. This report describes UKSR’s activities and particular achievements for the calendar year 2016, and also its plans for the future."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [21] "Sampling data captured in Oceanic Exploration Research 2015 was the last full year of activities under the polymetallic nodule exploration contract due to expire on 19 June 2016. The year was marked by the activities and work carried out under the third five-year programme of activities, as well as the preparation of the new five-year programme of activities proposed in the application for extension of an approved plan of work for exploration for polymetallic nodules submitted to the International Seabed Authority on 16 December 2015 in accordance with the modalities set forth in decision ISBA/21/C/19."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
## [22] "Sampling data captured in Oceanic Exploration Research Ifremer continued to implement its training programme in 2018 by hosting a Jamaican intern at its Deep-Sea Environment Laboratory (Laboratoire Environnement Profond) in Brest. The internship focused on the morphological identification and barcoding of samples from the Clarion Clipperton Zone. The Institute pursued its international involvement by contributing to initiatives undertaken by the Authority and continuing with its research partnerships, notably by participating in the “Mining Impact” project, a joint European and JPI Oceans initiative, organising the NAPOLEON (« NemAtodes from deeP-sea pOLymEtallic nOdules: implications to improve miNing pratices ») workshop in which two DORD representatives participated."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
## [23] "Sampling data captured in Oceanic Exploration Research Les travaux d’exploration pour l’année 2018 se sont focalisés sur la finalisation de l’étude géostatistique et la mise à jour de l’évaluation des ressources en nodules polymétalliques du contrat avec la société RSC Mining & Mineral Exploration Ltd (RSC). "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [24] "Sampling data captured in Oceanic Exploration Research JOGMEC has implemented the following work as crust exploration activities during 2018: (1) Exploration work During 2018, two surveys were conducted on the R/V Hakurei in the exploration area. Multi Beam Echo Sounder (MBES) acoustic survey, Benthic Multi-coring System (BMS) drilling, Arm-bolted Rock Dredge (AD) rock sampling were carried out for resource evaluation. The first survey targeted the JA04 Seamount, JA06 Seamount (aka the Xufu Guyot), the JA12 Seamount (aka the Zhinyu Guyot) and the JA17 Seamount (aka the Scripps Guyot). The period of the first survey was from 5 April to 5 May (31 days). The second survey targeted the JA02 Seamount (aka the Lamont Guyot), the JA04 Seamount and the JA06 Seamount. The period of second survey was from 11 May to 11 June (32 days). High-resolution bathymetry data of the JA04 and JA12 Seamounts, 84 drilling cores in 31 blocks and 394 kg of rock samples were acquired during the exploration survey in 2018. Chemical analysis for resource evaluation and lithofacies similarity analysis to compare characteristics of crust ores were carried out on drilling core samples. Moreover, statistical analysis between acoustic backscattering strength and seafloor observation was carried out to evaluate the distribution of crust ores on seamounts in the  exploration area. "                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
## [25] "Sampling data captured in Oceanic Exploration Research UK Seabed Resources Ltd (UKSR) is pleased to submit this annual report on activities with respect to the Plan of Work associated with its 15-year International Seabed Authority (ISA) Exploration Licence that entered into effect on 8 February 2013 (UK1). This report describes UKSR’s activities and particular achievements, for the calendar year 2015, and also its plans for the future. UKSR’s first major achievement in 2015 was its second exploration cruise. This 47-day cruise was a joint effort with Ocean Mineral Singapore Pte Ltd (OMS), whose contract area is adjacent to that of UKSR. The joint effort enabled the integration of a contingent of scientists from the National University of Singapore into both the on-board science crew and also in the subsequent processing of the samples and data collected. The cruise mobilisation began on 9 February and demobilisation was complete on 26 March. Of the 47 days, 33 days were on station. In addition to extensive sample and data collection work in both the UKSR and OMS areas, UKSR was very pleased to have collected data in an Area of Particular Environmental Interest (APEI) area 4. UKSR has now spent a total of 50 days on station in the eastern CCZ. The second major achievement has been the ongoing publishing of scientific results in peer reviewed scientific literature based on the samples and data obtained from the first (October 2013) cruise. The samples from the first and now second cruise are the basis for extensive laboratory work at leading research facilities around the world. An entire session, comprised of 14 separate papers at the tri-annual Deep Sea Biology Symposium (31 August – 4 September 2015 in Aveiro, Portugal) was dedicated to reporting on the work and results from the October 2013 cruise. There are now a total of 34 papers published or in review for publication that reflect data gleaned from samples and data taken on the two UKSR cruises. The third major achievement in 2015 was the beginning of the course of study for 2 students in UKSR’s training programme – a fully sponsored doctoral programme at Plymouth University. The student from South Africa arrived and began her programme of study on predictive habitat modelling in the deep sea. UKSR believes predictive habitat modelling may be an important contribution in determining whether protected areas are representative of those where polymetallic nodule collection might take place. The student from the Solomon Islands is due shortly, having been delayed by administrative issues. He will study the nodules themselves using advanced analytic techniques, and also potential novel metallurgic processing methods. On 29 March 2016 UKSR entered into its second exploration contract (UK2) with the International Seabed Authority for polymetallic nodules. This second exploration area is 74,919 km2 in the central CCZ, and UKSR intends to integrate together the exploration programme of this second contract area with that of the first area in the eastern CCZ. This will give the ISA and its stakeholders a broader insight into CCZ-wide ecosystem structure and function. With respect to challenges, in our view, the main challenges are to ensure that i) an enabling exploitation code is developed on a timely basis, based on consultations with the full scope of stakeholders (contractors, civil society/NGOs, ISA Member States), and ii) the exploitation code reflects the rapidly growing environmental data set and knowledge being produced by the scientific teams, sponsored by the ISA CCZ contractors."
## [26] "Sampling data captured in Oceanic Exploration Research UK Seabed Resources Ltd (UKSRL) is pleased to submit this annual report on activities with respect to the Plan of Work associated with its 15-year International Seabed Authority (ISA) Exploration Licence that entered into effect on 8 February 2013. This report describes the activities of the calendar year 2014."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
## [27] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities in 2016 for implementing the program of activities for the current five-year program of activities specified in Schedule 2 of Annex III to the contract for exploration of cobalt-rich ferromanganese crusts between the International Seabed Authority (hereinafter referred to as ISA) and the China Ocean Mineral Resources Research and Development Association. In accordance with the provisions specified in Section 10 of the Annex II to the contract, COMRA, as the contractor, submits to the Secretary-General of ISA this annual report covering its program of activities carried out in the year of 2016."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
## [28] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities in 2015 for implementing the program of activities for the current five-year program of activities specified in Schedule 2 of Annex III to the contract for exploration of cobalt-rich ferromanganese crusts between the International Seabed Authority (hereinafter referred to as ISA) and the China Ocean Mineral Resources Research and Development Association. In accordance with the provisions specified in Section 10 of the Annex II to the contract, COMRA, as the contractor, submits to the Secretary-General of ISA this annual report covering its program of activities carried out in the year of 2015."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                
## [29] "Sampling data captured in Oceanic Exploration Research Pursuant to Section 10, Appendix II of the Contract for Exploration between the International Seabed Authority and the Government of the Republic of Korea (ISBA/7/C/4), the Ministry of Oceans and Fisheries that is the relevant authority of the Government of the Republic of Korea hereby submits to the International Seabed Authority the annual report on the exploration activities in 2013 conducted in accordance with Paragraph 5, Schedule 2 on the Programme of Activities of the Contract for Exploration."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
## [30] "Sampling data captured in Oceanic Exploration Research Pursuant to Section 10, Appendix II of the Contract for Exploration between the International Seabed Authority and the Government of the Republic of Korea (ISBA/7/C/4), the Ministry of Land, Transport and Maritime Affairs (MLTM) that is the relevant authority of the Government of the Republic of Korea hereby submits to the International Seabed Authority the annual report on the exploration activities in 2010 conducted in accordance with Paragraph 5, Schedule 2 on the Programme of Activities of the Contract for Exploration. The Contractor had performed the geological and geophysical survey with sampling of sediments and nodules, the marine environmental baseline survey in and around the contract area focused on the potential bethic impact experiment and reference sites, and research and development works on mining technologies as a part of its exploration activities in 2010."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
## [31] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities during 2016 for implementing the programme of activities set forth in the Working Programme in the Extension Five-year Period specified in the Application for Extension of the Contract for Exploration of Polymetallic Nodules submitted by COMRA on 19th November 2015 and approved by ISBA on 18th July 2016. In accordance with the provisions specified in Section 10 of Annex IV to the Regulations, COMRA, as the contractor, submits to the Secretary- General of the International Seabed Authority (hereinafter referred to as ISBA), this Annual Report covering its programme of activities for the year of 2016."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
## [32] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities during 2015 for implementing the program of activities for the first five-year period (2011-2015) specified in Schedule 2 to the Contract for Exploration for Polymetallic Sulphides between the International Seabed Authority (hereinafter referred to as ISA) and the China Ocean Mineral Resources Research and Development Association. In accordance with\nthe provisions specified in Section 10 of the Appendix II to the Contract, COMRA, as the contractor, submits to the Secretary-General of ISA this Annual Report covering its program of activities carried out in the year of 2015."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
## [33] "Sampling data captured in Oceanic Exploration Research Since being awarded its Contract for Exploration in 2015, OMS has made significant contribution and progress towards the advancement of marine scientific knowledge and the global understanding of the environment and its resources. These findings and results have been reflected and reported annually to the Secretariat in accordance with the recommendations for the guidance of Contractors on the content, format and structure of annual reports as prescribed in ISBA/21/LTC/15."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [34] "Sampling data captured in Oceanic Exploration Research The China Ocean Mineral Resources Research and Development Association (hereinafter referred to as COMRA) conducted its activities during 2014 for implementing the program of activities for the third five-year (2011-2015) period specified in the revised Schedule 2 to the Contract for Exploration between the International Seabed Authority (hereinafter referred to as the ISA) and the China Ocean Mineral Resources Research and Development Association. In accordance with the provisions specified in Section 10 of the Contract, COMRA, as the contractor, submits to the Secretary- General of ISA, this Annual Report covering its program of activities for the year of 2014. "

Extracting occurrence data

Let’s first create a new ID column, this will be used later to link together the measurements and occurrences, and to select records by dataset. We cannot use occurrenceID here because these are not unique within the dataset.

library(uuid)

records$id <- UUIDgenerate(use.time = NA, n = nrow(records))
stopifnot(length(unique(records$id)) == nrow(records))

Now we can select and process the columns that will go into the occurrence table.

extract_occurrences <- function(df) {
    df %>%
      select("id", "dataset_id", "Occurrence", "Event", "Location", "Identification", "Record-level", "Taxon") %>%
      jsonlite::flatten() %>%
      rename_all(~str_replace(., ".*\\.", "")) %>%
      as_tibble()
}

occ <- extract_occurrences(records)

Initial cleanup of occurrence data

First clean up any escaped newlines, empty strings, and placeholder values. Also fix basisOfRecord:

library(stringr)

occ <- occ %>%
  mutate_all(~gsub("\\n", "", .)) %>%
  mutate_all(~na_if(., "")) %>%
  mutate(across(where(is.character), str_squish)) %>%
  mutate_all(~replace(., . %in% c("indet", "Not Reported", "indet."), NA)) %>%
  mutate(basisOfRecord = "HumanObservation")

Let’s take a look at scientificName and scientificNameID.

occ %>%
  group_by(scientificName) %>%
  summarize(records = n()) %>%
  arrange(desc(records)) %>%
  rmarkdown::paged_table()
occ %>%
  group_by(scientificNameID) %>%
  summarize(records = n()) %>%
  arrange(desc(records)) %>%
  rmarkdown::paged_table()

So at least in the current version at the time of writing (May 2021) there are some quality issues for both fields.

Fixing taxonomy

Let’s try to clean up the scientific names before we do taxon matching with WoRMS. Here I’m using the gni_parse() function from the taxize package, which connects to the GNI name parser. If a name cannot be parsed, I’m keeping the original.

The first step is to create a list of all distinct names in the taxonomy columns.

taxonomy <- occ %>%
  select(phylum, class, order, family, genus, scientificName)
names <- na.omit(unique(unlist(taxonomy)))

Then pass through the name parser:

library(taxize)

clean_name <- function(name) {
  parsed <- tryCatch({
    res <- gni_parse(name)
    stopifnot(nrow(res) == 1)
    return(res$canonical[1])
  },
  error = function(cond){
    return(name)
  })
}

names_clean <- sapply(names, clean_name)  

Now use the cleaned names for taxon matching:

library(worrms)

match_name <- function(name) {
  lsid <- tryCatch({
    res <- wm_records_names(name)
    matches <- res[[1]] %>%
      filter(match_type == "exact" | match_type == "exact_genus")
    if (nrow(matches) > 1) {
      message(paste0("Multiple exact matches for ", name))
    }
    return(matches$lsid[1])
  }, error = function(cond) {
    return(NA)
  })
}

lsids <- sapply(names_clean, match_name)

Now we need to find the lowest taxonomic level at which we find a name. Note that this will result in records with less taxonomic resolution than intended. Ideally we would only match on scientificName. First translate the taxonomy columns to LSIDs:

taxonomy_clean <- taxonomy %>%
  mutate_all(~names_clean[.]) %>%
  mutate_all(~lsids[.])

taxonomy_clean
## # A tibble: 55,786 x 6
##    phylum       class       order      family      genus      scientificName    
##    <chr>        <chr>       <chr>      <chr>       <chr>      <chr>             
##  1 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  2 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  3 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  4 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  5 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  6 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  7 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  8 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
##  9 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
## 10 urn:lsid:ma… urn:lsid:m… urn:lsid:… urn:lsid:m… urn:lsid:… urn:lsid:marinesp…
## # … with 55,776 more rows

The find the most specific one for each row:

taxonomy_clean <- taxonomy_clean %>%
  mutate(best = coalesce(scientificName, genus, family, order, class))

I’ll use the resulting LSIDs to replace the provided scientificNameIDs.

occ$scientificNameID <- taxonomy_clean$best

Let’s take another look at the top scientificName and scientificNameID after mathing:

occ %>%
  group_by(scientificName, scientificNameID) %>%
  summarize(records = n()) %>%
  arrange(desc(records)) %>%
  head(30) %>%
  knitr::kable()
scientificName scientificNameID records
nereus urn:lsid:marinespecies.org:taxname:106825 5516
armatus urn:lsid:marinespecies.org:taxname:575786 4006
NA NA 3462
armatus or yaquinae urn:lsid:marinespecies.org:taxname:125748 3180
na urn:lsid:marinespecies.org:taxname:123393 2626
NA urn:lsid:marinespecies.org:taxname:1135 2021
NA urn:lsid:marinespecies.org:taxname:883 1541
glabrum urn:lsid:marinespecies.org:taxname:123569 1120
bulbiceps urn:lsid:marinespecies.org:taxname:126107 958
- urn:lsid:marinespecies.org:taxname:744106 880
NA urn:lsid:marinespecies.org:taxname:1131 687
gryllus urn:lsid:marinespecies.org:taxname:101607 648
ns urn:lsid:marinespecies.org:taxname:1071 635
NA urn:lsid:marinespecies.org:taxname:1078 556
NA urn:lsid:marinespecies.org:taxname:1133 529
NA 458
Motu 0 urn:lsid:marinespecies.org:taxname:1131 423
sp1 urn:lsid:marinespecies.org:taxname:125575 402
NA urn:lsid:marinespecies.org:taxname:1102 394
NA urn:lsid:marinespecies.org:taxname:1071 381
MOTU 0 urn:lsid:marinespecies.org:taxname:883 360
Rimicaris exoculata urn:lsid:marinespecies.org:taxname:107542 342
NA urn:lsid:marinespecies.org:taxname:105 321
NA urn:lsid:marinespecies.org:taxname:123084 279
à déterminer NA 276
NA urn:lsid:marinespecies.org:taxname:1100 251
na urn:lsid:marinespecies.org:taxname:123083 230
Motu 1 urn:lsid:marinespecies.org:taxname:1131 216
NA urn:lsid:marinespecies.org:taxname:101 203
NA urn:lsid:marinespecies.org:taxname:1080 201

Extracting MeasurementOrFact data

extract_mof <- function(df) {
    df %>%
      select("id", "dataset_id", "MeasurementOrFact") %>%
      jsonlite::flatten() %>%
      rename_all(~str_replace(., ".*\\.", "")) %>%
      mutate(across(where(is.character), str_squish)) %>%
      mutate_all(~na_if(., "")) %>%
      filter(!is.na(measurementType) & !is.na(measurementValue)) %>%
      as_tibble()
}

mof <- extract_mof(records)
mof
## # A tibble: 12,330 x 6
##    id                 dataset_id measurementID measurementType  measurementValue
##    <chr>                   <int> <chr>         <chr>            <chr>           
##  1 825cd708-cd4b-400…          5 MUC204516398… Relative abunda… 11.9718309859155
##  2 a3809a71-4fd9-4c6…          5 MUC204716398… Relative abunda… 0.7042253521126…
##  3 e736e7db-fbf0-4a5…          5 MUC204916398… Relative abunda… 2.8169014084507 
##  4 5c5cb3ac-b85a-41f…          5 MUC205116398… Relative abunda… 0.7042253521126…
##  5 b530dd90-7950-40d…          5 MUC205516398… Relative abunda… 31.4814814814815
##  6 0c41556e-4229-4b0…          5 MUC205716398… Relative abunda… 16.6666666666667
##  7 886189b7-553e-4b8…          5 MUC205916398… Relative abunda… 3.7037037037037 
##  8 e423b1aa-b9cb-471…          5 MUC204016398… Relative abunda… 11.9718309859155
##  9 abef55d1-a089-41c…          5 MUC204116398… Relative abunda… 5.63380281690141
## 10 6bc7cdff-456f-452…          5 MUC204316398… Relative abunda… 33.0985915492958
## # … with 12,320 more rows, and 1 more variable: measurementUnit <chr>

A large number of records appear to have empty values. To demonstrate this, let’s take a look at the most common combinations of measurementType and measurementValue:

mof %>%
  group_by(measurementType, measurementValue) %>%
  summarize(records = n()) %>%
  arrange(desc(records)) %>%
  head(10) %>%
  knitr::kable()
measurementType measurementValue records
Relative abundance ns 4938
Relative abundance 0 404
Relative abundance in progress 141
Relative abundance 0.09765625 78
Relative abundance 0.282485875706215 72
Relative abundance 0.01 62
Relative abundance 0.122100122100122 58
Relative abundance 0.25 56
Relative abundance 0.26525198938992 56
Relative abundance 0.03 54

Generating Darwin Core Archives

Generating EML

For demonstration purposes, I’m working with the dataset pertaining to the first record here. The EML template is read from templates/eml.xml:

library(readr)
library(glue)

generate_eml <- function(df) {
  eml <- read_file("templates/eml.xml")
  metadata <- df$Metadata[1,]
  firstname <- strsplit(metadata$Contact$name, " ")[[1]][1]
  lastname <- strsplit(metadata$Contact$name, " ")[[1]][2]
  organization <- metadata$Contact$organisation
  email <- metadata$Contact$email
  position <- metadata$Contact$position
  abstract <- metadata$abstract
  title <- metadata$title
  citation <- metadata$citation
  packageid <- "https://datasets.obis.org/deepdata"
  pubdate <- format(Sys.time(), "%Y-%m-%d")
  datestamp <- format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
  glue(eml)
}

generate_eml(records)
## <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1"
##   xmlns:dc="http://purl.org/dc/terms/"
##   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
##   xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.1/eml.xsd"
##   packageId="https://datasets.obis.org/deepdata" system="http://gbif.org" scope="system"
##   xml:lang="eng">
## 
## <dataset>
##   <title xml:lang="eng">Biological Sampling</title>
##   <pubDate>2021-05-25</pubDate>
##   <language>eng</language>
##   <abstract>
##     <para>Sampling data captured in Oceanic Exploration Research In 2017, Ifremer started work on the geostatistical study for more precise estimation of the mineral resources in the contract area, in line with ISBA recommendations (ISBA/21/LTC/15). The study is being carried out in collaboration with RSC Mining & Mineral Exploration Ltd. (RSC). The study is still ongoing and is slated for completion in early 2018</para>
##   </abstract>
##   <keywordSet>
##     <keyword>Occurrence</keyword>
##     <keywordThesaurus>GBIF Dataset Type Vocabulary: http://rs.gbif.org/vocabulary/gbif/dataset_type.xml</keywordThesaurus>
##   </keywordSet>
##   <intellectualRights>
##     <para>This work is licensed under a <ulink url="http://creativecommons.org/licenses/by/4.0/legalcode"><citetitle>Creative Commons Attribution (CC-BY) 4.0 License</citetitle></ulink>.</para>
##   </intellectualRights>
##   <maintenance>
##     <description>
##       <para></para>
##     </description>
##     <maintenanceUpdateFrequency>unkown</maintenanceUpdateFrequency>
##   </maintenance>
##   <creator>
##     <individualName>
##     <givenName>Sheldon</givenName>
##     <surName>Carter</surName>
##     </individualName>
##     <organizationName>International Seabed Authority</organizationName>
##     <positionName>Database Manager</positionName>
##     <electronicMailAddress>scarter@isa.org.jm</electronicMailAddress>
##   </creator>
##   <metadataProvider>
##     <individualName>
##     <givenName>Sheldon</givenName>
##     <surName>Carter</surName>
##     </individualName>
##     <organizationName>International Seabed Authority</organizationName>
##     <positionName>Database Manager</positionName>
##     <electronicMailAddress>scarter@isa.org.jm</electronicMailAddress>
##   </metadataProvider>
##   <contact>
##     <individualName>
##     <givenName>Sheldon</givenName>
##     <surName>Carter</surName>
##     </individualName>
##     <organizationName>International Seabed Authority</organizationName>
##     <positionName>Database Manager</positionName>
##     <electronicMailAddress>scarter@isa.org.jm</electronicMailAddress>
##   </contact>
## </dataset>
## <additionalMetadata>
## <metadata>
## <gbif>
##   <dateStamp>2021-05-25T16:39:16+0200</dateStamp>
##   <hierarchyLevel>dataset</hierarchyLevel>
##   <citation>Paulo Bonifácio,Institut français de recherche pour l'exploitation de la mer,Mar 2015 via DeepData, International Seabed Authority</citation>
## </gbif>
## </metadata>
## </additionalMetadata>
## </eml:eml>

Generating an archive descriptor file

The archive also needs to include a meta.xml file which describes the files in the archive and their relationships.

Let’s first get a list of terms including their qualName.

library(xml2)

get_terms <- function(url) {
  doc <- read_xml(url)
  terms <- doc %>%
    xml_ns_strip() %>%
    xml_find_all(".//property") %>% 
    map_df(function(x) {
      list(
        name = xml_attr(x, "name"),
        qual = xml_attr(x, "qualName")
      )
    })
}

occurrence_terms <- get_terms("https://rs.gbif.org/core/dwc_occurrence_2020-07-15.xml")
mof_terms <- get_terms("https://rs.gbif.org/extension/obis/extended_measurement_or_fact.xml")

Using these we can generate a list of terms to go into the meta.xml file for each table.

generate_meta <- function(occ, mof) {
  occurrence_fields <- tibble(name = names(occ)) %>%
    left_join(occurrence_terms, by = "name") %>%
    mutate(index = as.numeric(row.names(.)) - 1)
  
  occurrence_lines <- paste0("<field index=\"", occurrence_fields$index, "\" term=\"", occurrence_fields$qual, "\"/>")
  occurrence_lines[1] <- "<id index=\"0\" />"
  occurrence_lines <- paste0(occurrence_lines, collapse = "\n")

  mof_fields <- tibble(name = names(mof)) %>%
  left_join(mof_terms, by = "name") %>%
  mutate(index = as.numeric(row.names(.)) - 1)

  mof_lines <- paste0("<field index=\"", mof_fields$index, "\" term=\"", mof_fields$qual, "\"/>")
  mof_lines[1] <- "<coreid index=\"0\" />"
  mof_lines <- paste0(mof_lines, collapse = "\n")

  meta <- read_file("templates/meta.xml")
  glue(meta)
}

generate_meta(occ, mof)
## <archive xmlns="http://rs.tdwg.org/dwc/text/" metadata="eml.xml">
##   <core encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
##     <files>
##       <location>occurrence.txt</location>
##     </files>
##     <id index="0" />
## <field index="1" term="NA"/>
## <field index="2" term="http://rs.tdwg.org/dwc/terms/occurrenceID"/>
## <field index="3" term="http://rs.tdwg.org/dwc/terms/catalogNumber"/>
## <field index="4" term="http://rs.tdwg.org/dwc/terms/recordedBy"/>
## <field index="5" term="http://rs.tdwg.org/dwc/terms/individualCount"/>
## <field index="6" term="http://rs.tdwg.org/dwc/terms/organismQuantity"/>
## <field index="7" term="http://rs.tdwg.org/dwc/terms/organismQuantityType"/>
## <field index="8" term="http://rs.tdwg.org/dwc/terms/sex"/>
## <field index="9" term="http://rs.tdwg.org/dwc/terms/occurrenceStatus"/>
## <field index="10" term="http://rs.tdwg.org/dwc/terms/associatedSequences"/>
## <field index="11" term="http://rs.tdwg.org/dwc/terms/occurrenceRemarks"/>
## <field index="12" term="http://rs.tdwg.org/dwc/terms/eventID"/>
## <field index="13" term="http://rs.tdwg.org/dwc/terms/eventDate"/>
## <field index="14" term="http://rs.tdwg.org/dwc/terms/eventTime"/>
## <field index="15" term="http://rs.tdwg.org/dwc/terms/year"/>
## <field index="16" term="http://rs.tdwg.org/dwc/terms/month"/>
## <field index="17" term="http://rs.tdwg.org/dwc/terms/day"/>
## <field index="18" term="http://rs.tdwg.org/dwc/terms/habitat"/>
## <field index="19" term="http://rs.tdwg.org/dwc/terms/samplingProtocol"/>
## <field index="20" term="http://rs.tdwg.org/dwc/terms/eventRemarks"/>
## <field index="21" term="http://rs.tdwg.org/dwc/terms/locationID"/>
## <field index="22" term="http://rs.tdwg.org/dwc/terms/minimumDepthInMeters"/>
## <field index="23" term="http://rs.tdwg.org/dwc/terms/maximumDepthInMeters"/>
## <field index="24" term="http://rs.tdwg.org/dwc/terms/verbatimDepth"/>
## <field index="25" term="http://rs.tdwg.org/dwc/terms/decimalLatitude"/>
## <field index="26" term="http://rs.tdwg.org/dwc/terms/decimalLongitude"/>
## <field index="27" term="http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem"/>
## <field index="28" term="http://rs.tdwg.org/dwc/terms/verbatimSRS"/>
## <field index="29" term="http://rs.tdwg.org/dwc/terms/identificationID"/>
## <field index="30" term="http://rs.tdwg.org/dwc/terms/typeStatus"/>
## <field index="31" term="http://rs.tdwg.org/dwc/terms/identifiedBy"/>
## <field index="32" term="http://rs.tdwg.org/dwc/terms/dateIdentified"/>
## <field index="33" term="http://rs.tdwg.org/dwc/terms/identificationVerificationStatus"/>
## <field index="34" term="http://purl.org/dc/terms/type"/>
## <field index="35" term="http://purl.org/dc/terms/license"/>
## <field index="36" term="http://purl.org/dc/terms/rightsHolder"/>
## <field index="37" term="http://purl.org/dc/terms/accessRights"/>
## <field index="38" term="http://purl.org/dc/terms/bibliographicCitation"/>
## <field index="39" term="http://rs.tdwg.org/dwc/terms/institutionID"/>
## <field index="40" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
## <field index="41" term="http://rs.tdwg.org/dwc/terms/taxonID"/>
## <field index="42" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
## <field index="43" term="http://rs.tdwg.org/dwc/terms/scientificNameID"/>
## <field index="44" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
## <field index="45" term="http://rs.tdwg.org/dwc/terms/phylum"/>
## <field index="46" term="http://rs.tdwg.org/dwc/terms/class"/>
## <field index="47" term="http://rs.tdwg.org/dwc/terms/order"/>
## <field index="48" term="http://rs.tdwg.org/dwc/terms/family"/>
## <field index="49" term="http://rs.tdwg.org/dwc/terms/genus"/>
## <field index="50" term="http://rs.tdwg.org/dwc/terms/taxonRank"/>
## <field index="51" term="http://rs.tdwg.org/dwc/terms/taxonomicStatus"/>
##   </core>
##   <extension encoding="UTF-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.iobis.org/obis/terms/ExtendedMeasurementOrFact">
##     <files>
##       <location>extendedmeasurementorfact.txt</location>
##     </files>
##     <coreid index="0" />
## <field index="1" term="NA"/>
## <field index="2" term="http://rs.tdwg.org/dwc/terms/measurementID"/>
## <field index="3" term="http://rs.tdwg.org/dwc/terms/measurementType"/>
## <field index="4" term="http://rs.tdwg.org/dwc/terms/measurementValue"/>
## <field index="5" term="http://rs.tdwg.org/dwc/terms/measurementUnit"/>
##   </extension>
## </archive>

Bringing it all together

Now we can generate an archive for each dataset. For now I’m adding the dataset ID to the title because there’s only a single title within the entire dataset.

While I’m generating datasets I’m also populating the RSS feed and creating dataset landing pages.

baseurl <- "https://datasets.obis.org/hosted/isa/"
item_template <- read_file("templates/rss_item.xml")
landing_template <- read_file("templates/index_dataset.html")
items <- list()
shortnames <- list()
pubdate <- format(Sys.time(), "%a, %d %b %Y %H:%M:%S %z")

for (datasetid in unique(records$dataset_id)) {
  
  dataset <- records %>%
    filter(dataset_id == datasetid) %>%
    head(1)
  
  title <- paste0(dataset$Metadata$title, "_", datasetid)
  abstract <- dataset$Metadata$abstract
  shortname <- str_replace(tolower(title), "\\s", "_")
  link <- paste0(baseurl, shortname, "/index.html")
  dwca <- paste0(baseurl, shortname, "/", shortname, ".zip")

  # clear dataset directory
    
  unlink(paste0("output/", shortname), recursive = TRUE)
  dir.create(paste0("output/", shortname))

  # RSS feed items
  
  item <- glue(item_template)
  items[[datasetid]] <- item
  
  # shortnames for the ISA landing page
  
  shortnames[[datasetid]] <- shortname

  # dataset landing page
  
  landing <- glue(landing_template)
  writeLines(landing, paste0("output/", shortname, "/index.html"))
  
  # archive  
  
  dataset_occ <- occ %>% filter(dataset_id == datasetid) 
  dataset_mof <- mof %>% filter(dataset_id == datasetid) 

  eml <- generate_eml(dataset)
  meta <- generate_meta(occ, mof)
  
  write.table(dataset_occ, file = paste0("output/", shortname, "/occurrence.txt"), sep = "\t", row.names = FALSE, na = "", quote = FALSE)
  write.table(dataset_mof, file = paste0("output/", shortname, "/extendedmeasurementorfact.txt"), sep = "\t", row.names = FALSE, na = "", quote = FALSE)
  writeLines(eml, paste0("output/", shortname, "/eml.xml"))
  writeLines(meta, paste0("output/", shortname, "/meta.xml"))
  
  files <- c("occurrence.txt", "extendedmeasurementorfact.txt", "eml.xml", "meta.xml")
  setwd(paste0("output/", shortname))
  zip(glue("{shortname}.zip"), files)
  for (f in files) {
    file.remove(f)
  }
  setwd("../..")

}

Data publishing

In this section all files are uploaded to an S3 bucket. A list of datasets is visible at https://datasets.obis.org/hosted/isa/index.html, and an RSS file is available for the OBIS harvester.

Generate RSS file

items <- paste0(items, collapse = "\n")
rss_template <- read_file("templates/rss.xml")

title <- "International Seabed Authority (ISA)"
description <- "International Seabed Authority (ISA)"
link <- paste0(baseurl, "index.html")

rss <- glue(rss_template)
writeLines(rss, "output/rss.xml")

Generate landing page

index_template <- read_file("templates/index.html")
content <- paste0(paste0("<li><a href=\"", shortnames, "/index.html\">", shortnames, "</a></li>"), collapse = "\n")
index <- glue(index_template)
writeLines(index, "output/index.html")

Uploading to S3

library("aws.s3")

delete_object("hosted/isa/", bucket = "obis-datasets")
files <- list.files("output", full.names = TRUE, recursive = TRUE, include.dirs = FALSE)

for (file in files) {
  folder <- str_replace(dirname(file), "output", "hosted/isa")
  target <- str_replace(file, "output", "hosted/isa")
  message(target)
  put_object(file, object = target, bucket = "obis-datasets", acl = "public-read")
}